# -*- mode: Perl -*-
# /=====================================================================\ #
# |  BibTeX.pool                                                        | #
# | Implementation for LaTeXML                                          | #
# |=====================================================================| #
# | Part of LaTeXML:                                                    | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Package::Pool;
use strict;
use warnings;
use LaTeXML::Package;
use Text::Balanced qw(extract_bracketed extract_delimited);

LoadPool('LaTeX');
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Design Notes
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Documents consulted:
#   BibTeX documents
#   amsrefs documents
#   biblatex
# with the result that there are a large number of semi-standard
# entry types and fields that might need to be supported.
#======================================================================
# I hate to reinvent "Bibliography as XML",
# since I'm hardly expert in the issues involved.
# However, it is hard to see how best to adapt another
# Bibliographic XML schema. The first thing is that
# the content model of leaf nodes would need to be changed
# from "text" to Inline.model (usually; maybe even occasionally Block (eg. abstract?))
#
# Some candidate formats that google "bibtex xml" turns up:
#  BibTeXML http://bibtexml.sourceforge.net/
#  MODS (LOC) See http://www.scripps.edu/~cdputnam/software/bibutils/
#     and  http://www.loc.gov/standards/mods/
# The latter, at least, can deal with more structure:
# eg. the book in an inbook is itself effectively an embedded bibentry,
# rather than just fields like booktitle, ...
# Is this sort of inference worthwhile?
#======================================================================
# Small questions:
#  * Do we want to Warn about missing Required Fields?
#  * Do we sort fields?  (presumably based on entry type)
#  * How many `common' extra fields should we handle?
#    New elements? or microformats (ie. overload w/a role attribute?)
#  * Do we need a naming convention for customization of BibTeX?
#     *.bst.ltxml is misleading? (bst deals more with bibitem style)
#      [Currently MakeBibliography does the job that a bst does!]
#      [or could we embed style info in the *.bib.xml ?]
# Bigger question:
#   * Should this module focus on bibentry generation
#   * Or generate bibentry+bibitem pairs?
#     [still can't do a/b... suffixing until inclusion time!]
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Customizability
# There are 3 mechanisms to customize the conversion
# of BibTeX entries: defining an alias for the entry type;
# handlers for the fields, and handlers to complete the entry.
#
# ALIAS: To cause entries of type foo to be treated as of type bar, define
#   \@bib@entry@foo@alias => bar
# ("foo" will be called the "origtype" in the following).
#
# HANDLERS: Fields are processed by invoking (on the field value)
# the first of the following macro/constructors that is defined:
#   \bib@field@<type>@<field>
#   \bib@field@<origtype>@<field>  (if an alias was defined)
#   \bib@field@default@<field>
# If no handler is defined,
#   \bib@field@default@default{field}{value}
# is used; it is initially bound to \bib@field@@ignore, which ignores the field.
# An alternative would be
#   \bib@field@@random  stores as <ltx:bib-random field='field'>...
#
#
# COMPLETION: Finally, additions to the entry can be made by defining
# a 'complete' macro/constructor. ALL of the following
# are run, if they are defined:
#   \bib@entry@<type>@complete
#   \bib@field@<origtype>@complete  (if an alias was defined)
#   \bib@entry@default@complete
#
# Defining Handlers and Completers:
#   The argument for handlers is the field's value.
# The parameter types Semiverbatim and Digested are often useful here.
# Semiverbatim for fields that may contain something like a url.
# Digested is good for general fields that may contain macros
# (like \url) that change catcodes; Digested acts somewhat as if
# \protect had been strategically scattered throughout.
#
# Within any of these handlers or completers, you may want to use
#     currentBibEntryField('fieldname')
# to get the string value (rather than digested value) for
# the given field, or you may use it to test for the presence
# of some other field.
# BUT: be careful, it looks up from a hash, and so doesn't work
# correctly when there are multiple values for a given field!!!
# [This may need to be revisited]
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

# Build a sequence of commands that will process the
# entry and its fields, depending on what sorts of
# aliases and commands have been defined.
DefMacro('\ProcessBibTeXEntry Semiverbatim',
  '\bibentry@prepare{#1}\bibentry@create{#1}');

DefPrimitive('\bibentry@prepare Semiverbatim', sub {
    my ($stomach, $key) = @_;
    $key = NormalizeBibKey($key);
    my $entry = LookupValue('BIBENTRY@' . $key);

    my $origtype   = $entry->getType;
    my $type       = $origtype;
    my $alias_defn = T_CS('\bib@entry@' . $type . '@alias');
    if (IsDefined($alias_defn)) {
      $type = ToString(Digest($alias_defn)); }

    # invoke ALL entry preparers
    $stomach->bgroup;
    AssignValue('CURRENT@BIBKEY' => $key);
    Digest(Tokens(grep { IsDefined($_) }
          T_CS('\bib@entry@' . $type . '@prepare'),
        ($origtype ne $type ? (T_CS('\bib@entry@' . $origtype . '@prepare')) : ()),
        T_CS('\bib@entry@default@prepare')));
    $stomach->egroup;
    return; });

DefPrimitive('\bibentry@create Semiverbatim', sub {
    my ($stomach, $key) = @_;
    $key = NormalizeBibKey($key);
    my $entry      = LookupValue('BIBENTRY@' . $key);
    my $origtype   = $entry->getType;
    my $type       = $origtype;
    my $alias_defn = T_CS('\bib@entry@' . $type . '@alias');
    if (IsDefined($alias_defn)) {
      $type = ToString(Digest($alias_defn)); }

    my @tex = (join('', '\begin{bib@entry}{', $type, '}{', $key, '}'));
    # Add the most specific handler for each field
    foreach my $pair ($entry->getFields) {
      my ($field, $value) = @$pair;
      my ($handler) = grep { IsDefined(T_CS('\\' . $_)) }
        'bib@field@' . $type . '@' . $field,
        ($origtype ne $type
        ? ('bib@field@' . $origtype . '@' . $field) : ()),
        'bib@field@default@' . $field;
      if ($handler) {
        push(@tex, join('', '\csname ', $handler, '\endcsname{', $value, '}')); }
      else {
        push(@tex, join('', '\csname bib@field@default@default\endcsname{', $field, '}{', $value, '}')); } }
    # Add ALL entry completers
    foreach my $completer (grep { IsDefined(T_CS('\\' . $_)) }
      'bib@entry@' . $type . '@complete',
      ($origtype ne $type
        ? ('bib@entry@' . $origtype . '@complete') : ()),
      'bib@entry@default@complete') {
      push(@tex, join('', '\csname ', $completer, '\endcsname')); }
    my $tex = join("\n", @tex, '\end{bib@entry}');
    $stomach->getGullet->openMouth(LaTeXML::Core::Mouth->new($tex));
    (); });

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Processing Entries
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Math ID's within the bibliography.
DefMacro('\the@XMARG@ID', '\thebibliography@ID.XM\arabic{@XMARG}');

DefEnvironment('{bibtex@bibliography}',
  "<ltx:bibliography xml:id='#id' "
    . "bibstyle='#bibstyle' citestyle='#citestyle' sort='#sort'>"
    . "<ltx:title font='#titlefont' _force_font='1'>#title</ltx:title>"
    . "<ltx:biblist>#body</ltx:biblist>"
    . "</ltx:bibliography>",
  beforeDigest => sub {
    beforeDigestBibliography(); },
  afterDigestBegin => sub { beginBibliography($_[1]); });

DefEnvironment('{bib@entry} Semiverbatim Semiverbatim',
  "<ltx:bibentry type='#1' key='#key' xml:id='#id'>#body</ltx:bibentry>",
  afterDigestBegin => sub {
    AssignValue('CURRENT@BIBKEY' => NormalizeBibKey($_[1]->getArg(2)));
    my $entry = currentBibEntry();
    $_[1]->setProperties(key => $entry->getKey, RefStepCounter('@bibitem')); });

sub currentBibKey {
  return LookupValue('CURRENT@BIBKEY'); }

sub currentBibEntry {
  return LookupValue('BIBENTRY@' . currentBibKey()); }

sub currentBibEntryField {
  my ($field) = @_;
  return currentBibEntry()->getField($field); }

sub currentBibEntryRawField {
  my ($field) = @_;
  return currentBibEntry()->getRawField($field); }

#======================================================================
DefMacro('\bib@entry@default@prepare', sub {
    copyCrossrefFields(qw(date year month day)); });

DefMacro('\bib@entry@default@complete',
  '\bib@synthesize@mr\bib@synthesize@zbl\bib@@origbibentry');

sub copyCrossrefFields {
  my (@fields) = @_;
  my $entry = currentBibEntry();
  if (my $xref = $entry->getField('crossref')) {
    if (my $xentry = LookupValue('BIBENTRY@' . NormalizeBibKey($xref))) {
      foreach my $field (@fields) {
        if (!defined $entry->getField($field)) {
          if (my $value = $xentry->getField($field)) {
            $entry->addField($field, $value); } }
        if (!defined $entry->getRawField($field)) {
          if (my $value = $xentry->getRawField($field)) {
            $entry->addRawField($field, $value); } }
  } } }
  return; }
#======================================================================
# Supporters

DefConstructor('\bib@@field {} OptionalKeyVals Digested', sub {
    my ($document, $tag, $attr, $content) = @_;
    $document->insertElement(ToString($tag), $content, ($attr ? $attr->getHash : ())); });

# Hmm, should this be something like an "add default field value"?
DefMacro('\bib@addtype{}', sub {
    my ($gullet, $type) = @_;
    if (currentBibEntryField('type')) {
      (); }
    else {
      Invocation(T_CS('\bib@field@default@type'), $type)->unlist; } });

sub bibAddToContainer {
  my ($document, $tag, $data, %attr) = @_;
  my $currentnode = $document->getNode;
  my $entry = $document->findnode('ancestor-or-self::ltx:bibentry | ancestor-or-self::ltx:bib-related',
    $currentnode);

  my $xpath = $tag;
  $xpath .= '[' . join(' and ', map { "\@$_='" . ToString($attr{$_}) . "'" } sort keys %attr) . ']'
    if keys %attr;
  if (my $rel = $document->findnode($xpath, $entry)) {
    $document->setNode($rel);
    $document->absorb($data) if $data;
    $document->setNode($currentnode); }
  else {
    $document->insertElement($tag, $data, %attr); }
  return; }

# Insert the argument into the bib-related node in the current bibentry.
# \bib@addtorelated{type}{role}
DefConstructor('\bib@addto@related {}{} Digested', sub {
    my ($document, $type, $role, $data) = @_;
    bibAddToContainer($document, 'ltx:bib-related', $data, type => $type, role => $role); });

# author : The name(s) of the author(s); this gets NameList treatment
DefConstructor('\bib@@@name{}{}',
  "<ltx:bib-name role='#1'>#2</ltx:bib-name>");
# This contructor merely serves to collect a set of names into one Whatsit
# so that \bib@addto@related can capture it.
DefConstructor('\bib@@@names{}', "#1");
DefMacro('\bib@@names{}{}', sub {
##    my ($gullet, $field, $ignore) = @_;
    my ($gullet, $field, $names) = @_;
    (T_CS('\bib@@@names'), T_BEGIN,
      (map { Invocation(T_CS('\bib@@@name'), $field, $_) }
##          processBibNameList(currentBibEntryField(ToString($field)))),
          processBibNameList(UnTeX($names))),
      T_END); });

# Now, we define
# \bib@@title
# which will do appropriate case conversion... but what's appropriate?
# Setting BibTeX_title_case to one of the following chooses.
# (the default, like most BibTeX styles, is capitalize1)
# Cases:
# * asis        : leave title as is
# * capitalize1 : downcase all, then Capitalize 1st word only
# * capitalize  : downcase, then capitalize ALL words (?)
#    (but really would ignore articles, prepositions...?)
# * uppercase   : convert to uppercase.
# * lowercase   : convert to lowercase.

DefMacro('\bib@@title{}{}{}', sub {
    my ($gullet, $field, $tag, $ignoretitle) = @_;
    my $title = currentBibEntryField(ToString($field));              # Get it in raw string form.
    my $mode  = LookupValue('BibTeX_title_case') || 'capitalize1';
    my $recap = '';
    my ($wb, $wc) = (1, 0);
    while ($title ne "") {
      if ($title =~ s/^(\s+)//) {
        $recap .= $1; $wb = 1; }
      elsif ($title =~ s/^((?:\w|\\(?:\w+|.))+)//) {
        if ($mode eq 'asis') {
          $recap .= $1; }
        elsif ($mode eq 'uppercase') {
          $recap .= uc($1); }
        elsif (($wb == 0)
          || (($mode eq 'capitalize1') && ($wc > 0))
          || ($mode eq 'lowercase')) {
          $recap .= lc($1); }
        elsif (($mode eq 'capitalize')
          || (($mode eq 'capitalize1') && ($wc == 0))) {
          $recap .= ucfirst($1); }
        $wc++ if $wb;
        $wb = 0; }
      elsif ($title =~ /^\{/) {
        my $t = extract_bracketed($title, '{}');
        if (!defined $t) {
          Error('expected', '}', $gullet, "Expected balancing } in bibliographic title");
          $title =~ s/^\{//; }
        else {
          $wc++ if $wb;
          $recap .= $t; $wb = 0; } }
      elsif ($title =~ /^\$/) {
        my $t = extract_delimited($title, '$');
        if (!defined $t) {
          Error('expected', '$', $gullet, "Expected balancing \$ in bibliographic title");
          $title =~ s/^\$//; }
        else {
          $recap .= $t; $wb = 0; } }
      elsif ($title =~ s/^(.)//) {
        $recap .= $1; $wb = 1; } }
    Invocation(T_CS('\bib@@field'), $tag, undef, Tokenize($recap)); });

# I'd thought booktitle were treated like title, but I think I was mistaken.
#DefMacro('\bib@@booktitle{}{}', '\bib@@title{booktitle}{#1}{#2}');
DefMacro('\bib@@booktitle{}{}', '\bib@@field{#1}{#2}');

#======================================================================
# Default field handlers

# This ignores the field.
DefMacro('\bib@field@@ignore Verbatim Verbatim', '');

# By default, we'll copy unrecognized fields to bib-data,
DefMacro('\bib@field@default@default Verbatim Verbatim', '\bib@field@unknownasdata{#1}'); # IGNORE the tokenized data.
DefConstructor('\bib@field@unknownasdata Verbatim',
  "<ltx:bib-data role='#1'>#rawdata</ltx:bib-data>",
  afterDigest => sub {
    my $field = ToString($_[1]->getArg(1));
    $_[1]->setProperty(rawdata => currentBibEntryField($field)); });

# OTOH, if you'd like to simply ignore them, do this:
# Let('\bib@field@default@default','\bib@field@@ignore');

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# The Standard BibTeX Entries
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#======================================================================
# Article
#  required: author,title,journal, year
#  optional: volume,number,pages,month,note
# An article gets a related item for the Journal.

DefMacro('\bib@entry@article@prepare', sub { copyCrossrefFields(qw(author title journal)); });

# journal : the journal that an article is part of
DefMacro('\bib@field@article@journal',
  '\bib@addto@related{journal}{host}\bib@@field{ltx:bib-title}');

#======================================================================
# Book
#  required: author or editor, title, publisher, year
#  optional: volume or number, series, address, edition, month, note

DefMacro('\bib@entry@book@prepare', sub { copyCrossrefFields(qw(author editor title publisher)); });

#======================================================================
# Booklet
#  required: title
#  optional: author, howpublished, address, month, year, note

DefMacro('\bib@entry@booklet@prepare', sub { copyCrossrefFields(qw(title)); });

#======================================================================
# conference
DefMacro('\bib@entry@conference@alias', 'inproceedings');

#======================================================================
# inbook
#   required: author or editor, title, chapter and/or pages, publisher, year
#   optional: volume or number, series, type, address, edition, month, note
# inbook gets a related item for the book, with fields:
#    editor, publisher, booktitle, volume or number, series, address, edition.
DefMacro('\bib@entry@inbook@prepare', sub {
    copyCrossrefFields(qw(author editor title chapter pages publisher)); });
DefMacro('\bib@field@inbook@booktitle',
  '\bib@addto@related{book}{host}\bib@@booktitle{ltx:bib-title}');
DefMacro('\bib@field@inbook@editor',
  '\bib@addto@related{book}{host}\bib@@names{editor}');
DefMacro('\bib@field@inbook@publisher',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-publisher}');
DefMacro('\bib@field@inbook@number',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-part}[role=number]');
DefMacro('\bib@field@inbook@volume',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-part}[role=volume]');
DefMacro('\bib@field@inbook@series',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-part}[role=series]');
DefMacro('\bib@field@inbook@address',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-place}');
DefMacro('\bib@field@inbook@edition',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-edition}');

#======================================================================
# incollection
#  required: author, title, booktitle, publisher, year
#  optional: editor, volume or number, series, type, chapter, pages,
#    address, edition, month, note.
# incollection gets a related item book, with fields:
#    booktitle, publisher, editor, volume or number, series, address, edition
DefMacro('\bib@entry@incollection@prepare', sub {
    copyCrossrefFields(qw(author title booktitle publisher)); });

DefMacro('\bib@field@incollection@booktitle',
  '\bib@addto@related{book}{host}\bib@@booktitle{ltx:bib-title}');
DefMacro('\bib@field@incollection@editor',
  '\bib@addto@related{book}{host}\bib@@names{editor}');
DefMacro('\bib@field@incollection@publisher',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-publisher}');
DefMacro('\bib@field@incollection@number',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-part}[role=number]');
DefMacro('\bib@field@incollection@volume',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-part}[role=volume]');
DefMacro('\bib@field@incollection@series',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-part}[role=series]');
DefMacro('\bib@field@incollection@address',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-place}');
DefMacro('\bib@field@incollection@edition',
  '\bib@addto@related{book}{host}\bib@@field{ltx:bib-edition}');

#======================================================================
# inproceedings
#   required: author, title, booktitle, year
#   optional: editor, volume or number, series, pages, address, month,
#     organization, publisher, note.
# inproceedings gets a related item proceedings, with fields:
#   booktitle, editor,volume or number, series, address, organization, publisher
DefMacro('\bib@entry@inproceedings@prepare', sub {
    copyCrossrefFields(qw(author title booktitle publisher)); });

DefMacro('\bib@field@inproceedings@booktitle',
  '\bib@addto@related{proceedings}{host}\bib@@booktitle{ltx:bib-title}');
DefMacro('\bib@field@inproceedings@editor',
  '\bib@addto@related{proceedings}{host}\bib@@names{editor}');
DefMacro('\bib@field@inproceedings@number',
  '\bib@addto@related{proceedings}{host}\bib@@field{ltx:bib-part}[role=number]');
DefMacro('\bib@field@inproceedings@volume',
  '\bib@addto@related{proceedings}{host}\bib@@field{ltx:bib-part}[role=volume]');
DefMacro('\bib@field@inproceedings@series',
  '\bib@addto@related{proceedings}{host}\bib@@field{ltx:bib-part}[role=series]');
DefMacro('\bib@field@inproceedings@organization',
  '\bib@addto@related{proceedings}{host}\bib@@field{ltx:bib-organization}');
DefMacro('\bib@field@inproceedings@publisher',
  '\bib@addto@related{proceedings}{host}\bib@@field{ltx:bib-publisher}');

DefMacro('\bib@field@inproceedings@conference',
  '\bib@addto@related{conference}{event}\bib@@field{ltx:bib-title}');
DefMacro('\bib@field@inproceedings@meeting',
  '\bib@addto@related{conference}{event}\bib@@field{ltx:bib-title}');
DefMacro('\bib@field@inproceedings@location',
  '\bib@addto@related{conference}{event}\bib@@field{ltx:bib-place}');
DefMacro('\bib@field@inproceedings@place',
  '\bib@addto@related{conference}{event}\bib@@field{ltx:bib-place}');

#======================================================================
# manual
#   required: title,
#   optional: author, organization, address, edition, month, yeaer, note.

DefMacro('\bib@entry@manual@prepare', sub { copyCrossrefFields(qw(title)); });

#======================================================================
# mastersthesis
#   required: author, title, school, year,
#   optional: type, address, month, note

DefMacro('\bib@entry@thesis@prepare', sub { copyCrossrefFields(qw(author title school)); });

DefMacro('\bib@entry@mastersthesis@alias',    'thesis');
DefMacro('\bib@entry@mastersthesis@complete', '\bib@addtype{Master\'s Thesis}');

#======================================================================
# misc
#   required: none
#   optional: author, title, howpublished, month, year, note

#======================================================================
# phdthesis
#   required: author, title, school, year,
#   optional: type, address, month, note.

DefMacro('\bib@entry@phdthesis@alias',    'thesis');
DefMacro('\bib@entry@phdthesis@complete', '\bib@addtype{Ph.D. Thesis}');

#======================================================================
# proceedings
#   required: title, year
#   optional: editor, volume or number, series, address, month,
#     organization, publisher, note

DefMacro('\bib@entry@proceedings@prepare', sub { copyCrossrefFields(qw(title)); });

DefMacro('\bib@field@proceedings@booktitle', sub {
    my ($gullet) = @_;
    if (currentBibEntryField('title')) {    # Already have a title.
      $gullet->readArg;
      (); }
    else {                                  # Else use this as the title.
      (TokenizeInternal('\bib@@field{ltx:bib-title}')->unlist); } });

#======================================================================
# techreport
#   required: author, title, institution, year
#   optional: type, number, address, month, note.
DefMacro('\bib@entry@techreport@alias', 'report');
DefMacro('\bib@entry@report@prepare', sub { copyCrossrefFields(qw(author title institution)); });
DefMacro('\bib@entry@techreport@complete', '\bib@addtype{Technical report}');

#======================================================================
# unpublished
#   required: author, title, note
#   optional: month, year

DefMacro('\bib@entry@unpublished@prepare', sub { copyCrossrefFields(qw(author title)); });

#======================================================================
# Common non-standard entry types.
DefMacro('\bib@entry@online@alias',     'website');
DefMacro('\bib@entry@electronic@alias', 'website');
DefMacro('\bib@entry@www@alias',        'website');
DefMacro('\bib@entry@webpage@alias',    'website');

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Processing Fields
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Default handlers

#======================================================================
# Agents...

# author : The name(s) of the author(s); this gets NameList treatment
DefMacro('\bib@field@default@author',
  '\bib@@names{author}');

# editor : The name(s) of the editor(s); this gets NameList treatment
DefMacro('\bib@field@default@editor',
  '\bib@@names{editor}');

# translator : The names(s) of any translator(s); this gets NameList treatment
DefMacro('\bib@field@default@translator',
  '\bib@@names{translator}');

#======================================================================
# Titles

# title : the title of the work.
DefMacro('\bib@field@default@title',
  '\bib@@title{title}{ltx:bib-title}');

DefMacro('\bib@field@default@subtitle',
  '\bib@@field{ltx:bib-subtitle}');

#======================================================================
# Origin info.

# date : iso formatted yyy-mm-dd
#  year (& month, day) fields can generate this element, as well.
DefMacro('\bib@field@default@date',
  '\bib@@field{ltx:bib-date}[role=publication]');

# edition : the edition of a book
DefMacro('\bib@field@default@edition',
  '\bib@@field{ltx:bib-edition}');

# address : Publisher's address
DefMacro('\bib@field@default@address',
  '\bib@@field{ltx:bib-place}');

# publisher : the name of the publisher
DefMacro('\bib@field@default@publisher',
  '\bib@@field{ltx:bib-publisher}');

# institution : an institution involved in publishing, (distinct from publisher?)
DefMacro('\bib@field@default@institution',
  '\bib@@field{ltx:bib-organization}');

# organization : a conference sponsor
DefMacro('\bib@field@default@organization',
  '\bib@@field{ltx:bib-organization}');

# school : The school where a thesis was written; analogous to institution
DefMacro('\bib@field@default@school',
  '\bib@@field{ltx:bib-organization}');

DefMacro('\bib@field@default@status',
  '\bib@@field{ltx:bib-status}');

# year : the year of publication.
# Try to synthesize an ISO date, if at all possible.
# Note that the raw month may be a standard abbreviation which is most safely translated to the number,
# otherwise the field may end up
# Note that month and day do not directly contribute entries;
# year incorporates them if found.
our %months = (    # Standard abbreviations (usually would appear in raw form of month field).
  jan => "1", feb => "2", mar => "3", apr => "4",  may => "5",  jun => "6",
  jul => "7", aug => "8", sep => "9", oct => "10", nov => "11", dec => "12",
  # English month names, perhaps in expanded form of month field
  january => "1", february => "2", march => "3", april => "4", may => "5", june => "6",
  july => "7", august => "8", september => "9", october => "10", november => "11", december => "12");
# Is there any sane way of looking up month names from other languages; maybe something from babel?

DefMacro('\bib@field@default@year {}', sub {
    if (currentBibEntryField('date')) {
      (); }        # Ignore this, if we've already been given a date field.
    else {
      my $date = currentBibEntryField('year');
      my $x;
      my $month;
      if (($x = currentBibEntryRawField('month')) && ($x = $months{ lc($x) })) { $month = $x; }
      elsif ($x = currentBibEntryField('month')) { $month = $months{ lc($x) } || $x; }
      my $day = currentBibEntryField('day');
      # What if month or day is given, but not year ???
      if (defined $month) {
        $date .= '-' . ($month =~ /^\d$/ ? '0' . $month : $month);
        if (defined $day) {
          $date .= '-' . ($day =~ /^\d$/ ? '0' . $day : $day); } }
      (T_CS('\bib@field@default@date'), T_BEGIN, Tokenize($date)->unlist, T_END); } });

# howpublished : if something non-standard
DefMacro('\bib@field@default@howpublished',
  '\bib@@field{ltx:bib-note}[role=publication]');

#======================================================================
# Part info.

# chapter : the chapter number (of a book, presumably inbook or similar)
DefMacro('\bib@field@default@chapter',
  '\bib@@field{ltx:bib-part}[role=chapter]');

# number : the number of a journal, magazine, or whatever
DefMacro('\bib@field@default@number',
  '\bib@@field{ltx:bib-part}[role=number]');

# volume : the volume of a journal or multi-volume work
DefMacro('\bib@field@default@volume',
  '\bib@@field{ltx:bib-part}[role=volume]');

DefMacro('\bib@field@default@part',
  '\bib@@field{ltx:bib-part}[role=part]');

# series : the series of books a book was published in.
DefMacro('\bib@field@default@series',
  '\bib@@field{ltx:bib-part}[role=series]');

# pages : the page or page range
DefMacro('\bib@field@default@pages',
  '\bib@@field{ltx:bib-part}[role=pages]\bib@@pages');

DefConstructor('\bib@@pages{}', "#pages",
  afterDigest => sub {
    my $pages = currentBibEntryField('pages');
    $pages =~ s/-+/--/g;    # Force - to -- (ligature will turn into endash
    $_[1]->setProperty(pages => Digest(Tokenize($pages))); });

#======================================================================
# The Standard BibTeX Fields

# annote : annotations; I'll assume annote is equivalent to note.
DefMacro('\bib@field@default@annote',
  '\bib@@field{ltx:bib-note}[role=annotation]');

# crossref : the key of a cross-referenced entry
DefConstructor('\bib@field@default@crossref Semiverbatim', sub {
    my ($document, $key) = @_;
    bibAddToContainer($document, 'ltx:bib-related', undef, role => 'host', bibrefs => CleanBibKey($key)); });

# key : a normally hidden field for ordering
DefConstructor('\bib@field@default@key Digested',
  "<ltx:bib-key>#1</ltx:bib-key>");

# note : miscellaneous annotation
DefMacro('\bib@field@default@note',
  '\bib@@field{ltx:bib-note}[role=annotation]');

# type : the type of a report
DefConstructor('\bib@field@default@type Digested',
  "<ltx:bib-type>#1</ltx:bib-type>");

#======================================================================
# Non-Standard but Common (?) fields.

# affiliation : the author's affiliation
# IMPLEMENT THIS!
# Seems like this should be tucked into author (or editor...)?

# abstract : a copy of the abstract
DefMacro('\bib@field@default@abstract',
  '\bib@@field{ltx:bib-extract}[role=abstract]');

# archive : specifies where this is archived (URL?)
DefMacro('\bib@field@default@archive',
  '\bib@@field{ltx:bib-links}');    # add text? or role?

# contents : a table of contents
DefMacro('\bib@field@default@contents',
  '\bib@@field{ltx:bib-extract}[role=contents]');

# copyright : copyright information
DefMacro('\bib@field@default@copyright',
  '\bib@@field{ltx:bib-date}[role=copyright]');

# eprint : specifies an electronic publication; Is this a URL?
DefMacro('\bib@field@default@eprint',
  '\bib@@field{ltx:bib-links}');    # add text? or role?

# preprint : specifies an electronic publication (not a URL).
DefMacro('\bib@field@default@preprint',
  '\bib@@field{ltx:bib-links}');    # add text? or role?

# keywords : a list of subject keywords
DefMacro('\bib@field@default@keywords',
  '\bib@@field{ltx:bib-extract}[role=keywords]');

# language : the language a document is in.
DefMacro('\bib@field@default@language',
  '\bib@@field{ltx:bib-language}');

# url : specifies a URL where the work can be found.
DefConstructor('\bib@field@default@url Verbatim',
  "<ltx:bib-url href='#1'>Link</ltx:bib-url>");

# enote : miscellaneous annotation
DefMacro('\bib@field@default@enote',
  '\bib@@field{ltx:bib-note}[role=electronic-annotation]');

#======================================================================
# Interesting Identifiers
# doi : Document Object Identifier
DefConstructor('\bib@field@default@doi Semiverbatim',
  "<ltx:bib-identifier scheme='doi' id='#1' href='#href'>Document</ltx:bib-identifier>",
  properties => sub {
    my $id = processIdentifier($_[1]);
    # doi's can have all sorts of screwy characters which must be encoded in urls
    $id =~ s/([^0-9a-zA-Z\.\/\-\+])/ sprintf('%%%02X',ord($1)) /ge;
    (id => $id, href => "https://dx.doi.org/$id"); });

# ISBN : the International Standard Book Number
# Is there any useful URL that could be associated here?
DefConstructor('\bib@field@default@isbn Semiverbatim',
  "<ltx:bib-identifier scheme='isbn' id='#id'>ISBN #1</ltx:bib-identifier>",
  properties => sub {
    (id => processIdentifier($_[1])); });

# ISSN : the International Standard Serial Number (eg. of a Journal)
# Is there any useful URL that could be associated here?
DefConstructor('\bib@field@default@issn Semiverbatim',
  "<ltx:bib-identifier scheme='issn' id='#id'>ISSN #1</ltx:bib-identifier>",
  properties => sub {
    (id => processIdentifier($_[1])); });

# LCCN : the Library of Congress Call Number
DefConstructor('\bib@field@default@lccn Semiverbatim',
  "<ltx:bib-identifier scheme='lccn' id='#id'>LCCN #1</ltx:bib-identifier>",
  properties => sub {
    (id => processIdentifier($_[1])); });

# PII : the Publisher Item Identifier (?)
DefConstructor('\bib@field@default@pii Semiverbatim',
  "<ltx:bib-identifier scheme='pii' id='#id'>PII #1</ltx:bib-identifier>",
  properties => sub {
    (id => processIdentifier($_[1])); });

sub processIdentifier {
  my ($string) = @_;
  $string = ToString($string) if ref $string;
  $string =~ s/^\s+//;
  $string =~ s/\s+$//;
  return $string; }

#======================================================================
# Reviews.

DefConstructor('\bib@field@default@review Digested',
  "<ltx:bib-review>Review #1</ltx:bib-review>");

# mrnumber is a common non-standard field
# mrreviewer isn't
# zblno, zblreviewer are analogous, but not common.
#
# I'm synthesize 1 field from mrrnumber & mrreviewer (as done in DLMF)
# Similarly for zblno & zblreviewer.
DefMacro('\bib@synthesize@mr', sub {
    my $mrnumber   = currentBibEntryField('mrnumber');
    my $mrreviewer = currentBibEntryField('mrreviewer');
    if ($mrnumber || $mrreviewer) {
      Invocation(T_CS('\bib@@mr'), Tokenize($mrnumber),
        ($mrreviewer ? Tokenize($mrreviewer) : Tokens())); }
    else {
      (); } });

DefConstructor('\bib@@mr {}{}',
  "?#isreview"
    . "(?#reviewer"
    . "(<ltx:bib-review scheme='mr' id='#id' href='#href'>MathReview (#reviewer)</ltx:bib-review>)"
    . "(<ltx:bib-review scheme='mr' id='#id' href='#href'>MathReview</ltx:bib-review>))"
    . "(<ltx:bib-identifier scheme='mr' id='#id' href='#href'>MathReview Entry</ltx:bib-identifier>)",
  properties => sub {
    my $id = ToString($_[1]);
    $id =~ s/^\s+//; $id =~ s/\s+$//;
    my $reviewer = $_[2];
    my $isreview = ToString($reviewer) ne '';
    if ($id =~ /^\s*(?:MR)?(\d+)\s+\(.*\)\s*$/) {    # If extra number, there's a review
      $id = $1; $isreview = 1; }
    my $href = "https://www.ams.org/mathscinet-getitem?mr=" . $id;
    (isreview => $isreview, id => $id, href => $href, reviewer => $reviewer); });

DefMacro('\bib@synthesize@zbl', sub {
    my $zblno       = currentBibEntryField('zblno');
    my $zblreviewer = currentBibEntryField('zblreviewer');
    if ($zblno || $zblreviewer) {
      Invocation(T_CS('\bib@@zbl'), Tokenize($zblno),
        ($zblreviewer ? Tokenize($zblreviewer) : Tokens())); }
    else {
      (); } });

DefConstructor('\bib@@zbl {}{}',
  "?#reviewer"
    . "(<ltx:bib-review scheme='zbl' id='#id' href='#href'>ZentralBlatt (#reviewer)</ltx:bib-review>)"
    . "(<ltx:bib-review scheme='zbl' id='#id' href='#href'>ZentralBlatt</ltx:bib-review>)",
  properties => sub {
    my $id       = ToString($_[1]);
    my $reviewer = $_[2];
    my $href     = "https://zbmath.org/$id";
    (id => $id, href => $href, reviewer => $reviewer); });

#======================================================================
# Extra fields used in DLMF (elsewhere?)

DefConstructor('\bib@field@default@links Digested',
  "<ltx:bib-links>#1</ltx:bib-links>");

# This puts a copy of the BibTeX entry into the XML
# It is expanded for BibTeX data, but not TeX expanded.
# It seems that using bib-identifier is only slightly perverse.
DefConstructor('\bib@@origbibentry',
  "<ltx:bib-data role='self' type='BibTeX'>#bibentry</ltx:bib-data>",
  afterDigest => sub {
    $_[1]->setProperty(bibentry => currentBibEntry()->prettyPrint);
    return; });
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Processing BibTeX Name formats
# Multiple names are separated by " and " and be ended by " and others".
# BibTeX distinguishes 4 name components: First, Last, von and Jr., in 3 formats:
#    First von Last
#    von Last, First
#    von Last, Jr, First
# I suppose BibTeX is more worried about "von" since it does all sorts of case mangling?
# We'll just combine "von Last" as Surname.
# This simplifies the processing of all but the first case.

sub processBibNameList {
  my ($string) = @_;
  my @names    = ();
  my @words    = splitWords($string);
  my $etal     = 0;
  # NOTE: Should formally represent the etal???
  if ($words[-2] && ($words[-2] =~ /^and$/i) && ($words[-1] =~ /^(others|et\s*al\.?)$/i)) {
    pop(@words); pop(@words); $etal = 1; }
  while (@words) {
    my @phrases = ();
    my @phrase  = ();
    while (@words) {
      my $word = shift(@words);
      last if $word =~ /^and$/i;
      if ($word eq ',') {
        push(@phrases, [@phrase]); @phrase = (); }
      else {
        push(@phrase, $word); } }
    if (!scalar(@phrase)) {
      Warn('expected', "<bibname>", undef,
        "Empty name in bibliographic name list for " . currentBibKey());
      next; }
    push(@phrases, [@phrase]) if @phrase;

    my ($given, $surname, $jr);
    if (scalar(@phrases) == 3) {    # von Last, Jr, First
      $surname = join(' ', @{ $phrases[0] });
      $jr      = join(' ', @{ $phrases[1] });
      $given   = join(' ', @{ $phrases[2] }); }
    elsif (scalar(@phrases) == 2) {    # von Last, First
      $surname = join(' ', @{ $phrases[0] });
      $given   = join(' ', @{ $phrases[1] }); }
    else {                             # First von Last
      my @pwords = @{ $phrases[0] };
      my @first;
      while (@pwords && ($pwords[0] !~ /^[a-z]/)) {    # This case test is not correct!!!
        push(@first, shift(@pwords)); }
      push(@pwords, pop(@first)) unless @pwords;
      $given   = join(' ', @first);
      $surname = join(' ', @pwords); }
    push(@names, Tokens(($surname ? Invocation(T_CS('\bib@surname'), Tokenize($surname)) : ()),
        ($given ? Invocation(T_CS('\bib@given'),   Tokenize($given)) : ()),
        ($jr    ? Invocation(T_CS('\bib@lineage'), Tokenize($jr))    : ())));
  }
  if($etal){
    push(@names, Invocation(T_CS('\bib@surname'),Tokenize('others'))); }
  return @names; }

# Split into space separated words, but ignore spaces within {}
sub splitWords {
  my ($string) = @_;
  $string =~ s/\\~/####/g;    # Protect \~ so we can split on hard spaces (~) as well as regular ones.
  $string =~ s/^[\s~]+//gs;
  $string =~ s/%\n//gs;       # Cleanup linebreaks
  my @words = ();
  my $word  = '';
  while ($string) {
    if ($string =~ s/^(,?)[\s~]+//s) {
      push(@words, $word) if $word;
      push(@words, $1)    if $1;
      $word = ''; }
    #    elsif($string =~ s/^([^\{\s,~]+)//){
    elsif ($string =~ s/^([^\{\s,~]+)//) {
      $word .= $1; }
    elsif ($string =~ /^\{/) {
      my $t = extract_bracketed($string, '{}');
      if (!defined $t) {
        Error('expected', '}', $STATE->getStomach->getGullet,
          "Expected balancing } in bibliographic phrases");
        $string =~ s/^\{//; }
      else {
        $word .= $t; } }
    elsif ($string =~ s/^(.)//) {
      $word .= $1; } }
  push(@words, $word) if $word;
  my $w;
  @words = map { (($w = $_) =~ s/####/\\~/g ? $w : $w) } @words;    # Put any \~ back
  return @words; }

DefConstructor('\bib@surname{}', "<ltx:surname>#1</ltx:surname>");
DefConstructor('\bib@given{}',   "<ltx:givenname>#1</ltx:givenname>");
DefConstructor('\bib@lineage{}', "<ltx:lineage>#1</ltx:lineage>");

#**********************************************************************
1;
